Loading libraries.
library(dplyr)
library(readr)
library(keras)
library(R.matlab)
The dataset I’m using is “Stanford cars” which has 196 categories based on maker, model, and year of cars. For example “2012 Tesla Model S” is one such category. There are ~8000 images in training and testing dataset (provided separately by the website) and split if about 50-50. Since, that is a large number of images, I will be taking only 20 categories.
train_annos = readMat('cars_train_annos.mat')
test_annos = readMat('cars_test_annos_withlabels.mat')
meta = data.frame(id = 1:196, name = readMat('cars_meta.mat')$class.names %>% unlist())
meta = meta[meta$id %in% 1:20, ]
meta$name = as.character(meta$name)
# conflict with using rmatio' read.mat and keras' image_load
# hence using R.matlab's readMat (much slower) which returns list of elements not packed properly
# which is why the seq in the following is being used
train = data.frame(fname = unlist(train_annos$annotations)[seq(6, 48864, 6)],
class = as.numeric(unlist(train_annos$annotations)[seq(5, 48864, 6)]))
test = data.frame(fname = unlist(test_annos$annotations)[seq(6, 48246, 6)],
class = as.numeric(unlist(test_annos$annotations)[seq(5, 48246, 6)]))
remove(train_annos, test_annos)
# take images with id 1-20 in train and test and copy to selected_train, selected_test folders
indices = which(train$class %in% 1:20)
train = train[indices, ]
# 823 images
indices = which(test$class %in% 1:20)
test = test[indices, ]
# 814 images
copy = lapply(paste('cars_train/', train$fname, sep = ''), file.copy, to = 'selected_train/')
copy = lapply(paste('cars_test/', test$fname, sep = ''), file.copy, to = 'selected_test/')
remove(copy, indices)
Now that the folder with only 20 categories is ready, let’s create the array. The images in the dataset are different sizes: 1280x949, 628x424, etc. In order to get a small standardized dataset, I will be using 224x224 pixels with RGB channels.
paths = paste('selected_train/', train$fname, sep = '')
# dimensions of array will be # of images, pixels_x, pixels_y, RGB
X_train = array(dim = c(length(paths), 224, 224, 3))
for(i in 1:length(paths)){
X_train[i, , , ] = image_load(paths[i], target_size = c(224, 224)) %>% image_to_array()
}
paths = paste('selected_test/', test$fname, sep = '')
# dimensions of array will be # of images, pixels_x, pixels_y, RGB
X_test = array(dim = c(length(paths), 224, 224, 3))
for(i in 1:length(paths)){
X_test[i, , , ] = image_load(paths[i], target_size = c(224, 224)) %>% image_to_array()
}
remove(i)
dim(X_train)
## [1] 823 224 224 3
dim(X_test)
## [1] 814 224 224 3
remove(paths)
Now that the arrays have generated, here are images from each category.
par(mfrow = c(5, 4))
id = match(meta$id, train$class)
for (i in id){
par(mar = rep(0, 4L))
plot(0,0,xlim=c(0,1),ylim=c(0,1),axes= FALSE,type = "n")
rasterImage(X_train[i,,,] /255,0,0,1,1)
text(0.5, 0.1, label = meta$name[train$class[i]], col = "red", cex=1)
}
remove(id, i)
Generate y dataset.
# to turn data into categories using keras' to_categorical, need to have categories number
# from 0 to 19, instead of 1 to 20
# solution is to subtract 1 from all classes
train$class = train$class - 1
test$class = test$class - 1
meta$id = meta$id - 1
y_train = to_categorical(train$class, num_classes = 20)
y_test = to_categorical(test$class, num_classes = 20)
Run neural network model.
model = keras_model_sequential()
model %>%
layer_conv_2d(filters = 16, kernel_size = c(5, 5),
input_shape = dim(X_train)[-1],
padding = "same") %>%
layer_max_pooling_2d(pool_size = c(2, 2)) %>%
layer_activation(activation = "relu") %>%
layer_flatten() %>%
layer_dense(units = 20) %>%
layer_activation(activation = "softmax")
model %>% compile(loss = 'categorical_crossentropy',
optimizer = optimizer_sgd(lr = 0.01, momentum = 0.6),
metrics = c('accuracy'))
history <- model %>% fit(X_train, y_train, epochs = 5,
validation_data = list(X_test, y_test), batch_size = 32)
plot(history)
The model is clearly useless. It is almost as good as random guessing (814/20 is average number of images per category; divide by 814, we get 5% probability per image). Let’s try Transfer Learning using resnet. Before applying the model, let’s create embed data.
which_dataset = X_train
resnet50 <- application_resnet50(weights = 'imagenet', include_top = TRUE)
model_embed <- keras_model(inputs = resnet50$input,
outputs = get_layer(resnet50, 'avg_pool')$output)
num_cols <- model_embed$output_shape[[length(model_embed$output_shape)]]
X <- matrix(NA_real_, nrow = nrow(which_dataset), ncol = as.numeric(num_cols))
num_batch <- 5
batch_id <- c(rep(1, 164), rep(2, 164), rep(3, 164), rep(4, 164), rep(5, 167))
input_shape <- unlist(model_embed$input_shape)[1:2]
for (j in seq_len(num_batch))
{
print(sprintf("Processing batch %d of %d", j, num_batch))
these <- which(batch_id == j)
#unlist(model_embed$input_shape)
Z <- array(0, dim = c(length(these), input_shape, 3))
Z = which_dataset[these,,,]
X_temp <- predict(model_embed, x = imagenet_preprocess_input(Z), verbose = TRUE)
X[these,] <- array(X_temp, dim = c(length(these), ncol(X)))
}
## [1] "Processing batch 1 of 5"
## [1] "Processing batch 2 of 5"
## [1] "Processing batch 3 of 5"
## [1] "Processing batch 4 of 5"
## [1] "Processing batch 5 of 5"
X_train_embed = X
############################################################################
which_dataset = X_test
resnet50 <- application_resnet50(weights = 'imagenet', include_top = TRUE)
model_embed <- keras_model(inputs = resnet50$input,
outputs = get_layer(resnet50, 'avg_pool')$output)
num_cols <- model_embed$output_shape[[length(model_embed$output_shape)]]
X <- matrix(NA_real_, nrow = nrow(which_dataset), ncol = as.numeric(num_cols))
num_batch <- 5
batch_id <- c(rep(1, 162), rep(2, 162), rep(3, 162), rep(4, 162), rep(5, 166))
input_shape <- unlist(model_embed$input_shape)[1:2]
for (j in seq_len(num_batch))
{
print(sprintf("Processing batch %d of %d", j, num_batch))
these <- which(batch_id == j)
#unlist(model_embed$input_shape)
Z <- array(0, dim = c(length(these), input_shape, 3))
Z = which_dataset[these,,,]
X_temp <- predict(model_embed, x = imagenet_preprocess_input(Z), verbose = TRUE)
X[these,] <- array(X_temp, dim = c(length(these), ncol(X)))
}
## [1] "Processing batch 1 of 5"
## [1] "Processing batch 2 of 5"
## [1] "Processing batch 3 of 5"
## [1] "Processing batch 4 of 5"
## [1] "Processing batch 5 of 5"
X_test_embed = X
remove(Z, X_temp, these, num_batch, input_shape, X, num_cols, which_dataset, batch_id)
Now we can run a neural network using the embedded data.
model <- keras_model_sequential()
model %>%
layer_dense(units = 2048, input_shape = ncol(X_train_embed)) %>%
layer_activation(activation = "elu") %>%
layer_dense(units = 1024) %>%
layer_activation(activation = "elu") %>%
layer_dense(units = 512) %>%
layer_activation(activation = "elu") %>%
layer_dense(units = 256) %>%
layer_activation(activation = "elu") %>%
layer_dense(units = 128) %>%
layer_activation(activation = "elu") %>%
layer_dense(units = 64) %>%
layer_activation(activation = "elu") %>%
layer_dense(units = 32) %>%
layer_activation(activation = "elu") %>%
layer_dense(units = ncol(y_train)) %>%
layer_activation(activation = "softmax")
# first try
model %>% compile(loss = 'categorical_crossentropy',
optimizer = optimizer_rmsprop(lr = 0.001 / 2),
metrics = c('accuracy'))
history <- model %>%
fit(X_train_embed, y_train, epochs = 10, validation_data = list(X_test_embed, y_test))
plot(history)
# second try
model %>% compile(loss = 'categorical_crossentropy',
optimizer = optimizer_rmsprop(lr = 0.001 / 16),
metrics = c('accuracy'))
history <- model %>%
fit(X_train_embed, y_train, epochs = 10, validation_data = list(X_test_embed, y_test))
plot(history)
That gives much better accuracy. I also believe the error is due to the sample batch embedding since it’s possible that not enough examples of each category were embedded. It seems more likely when we see that the accuracy on training is about 99%.
class_names = levels(factor(meta$name))
y = test$class
y_pred <- predict_classes(model, X_test_embed)
mean(y == y_pred)
## [1] 0.5515971
confusion = data.frame(table(value = class_names[y + 1L], prediction = class_names[y_pred + 1L]))
confusion = confusion[confusion$Freq != 0 & confusion$value != confusion$prediction, ] %>% as.tbl() %>%
arrange(desc(Freq))
confusion_5 = data.frame(confusion[1:5, ])
ids = vector(length = 10, mode = 'numeric')
n = 1
for(i in 1:5){
for(j in 1:2){
ids[n] = meta$id[which(confusion_5[i, j] == meta$name)]
n = n + 1
}
}
par(mfrow = c(5, 2))
id = match(ids, test$class)
for (i in id){
par(mar = rep(0, 4L))
plot(0,0,xlim=c(0,1),ylim=c(0,1),axes= FALSE,type = "n")
rasterImage(X_test[i,,,] /255,0,0,1,1)
text(0.5, 0.1, label = meta$name[test$class[i]+1], col = "red", cex=2)
}
remove(id, i, ids, n, j)
Images with highest probabilities based on the trained model.
ids_with_high_p = data.frame(table(y_pred)) %>% as.tbl() %>% arrange(desc(Freq)) %>%
slice(1:5) %>% data.frame()
par(mfrow = c(3, 2))
id = match(ids_with_high_p$y_pred, test$class)
for (i in id){
par(mar = rep(0, 4L))
plot(0,0,xlim=c(0,1),ylim=c(0,1),axes= FALSE,type = "n")
rasterImage(X_test[i,,,] /255,0,0,1,1)
text(0.5, 0.1, label = meta$name[test$class[i]+1], col = "red", cex=2)
}
ids_with_high_p$Freq/814
## [1] 0.07493857 0.06019656 0.05896806 0.05773956 0.05773956
Visualization using PCA.
library(ggplot2)
pca <- as_tibble(prcomp(X_test_embed)$x[,1:2])
pca$y <- class_names[y + 1L]
ggplot(pca, aes(PC1, PC2)) +
geom_point(aes(color = y), alpha = 0.2, size = 7) +
labs(x = "", y = "", color = "class") +
theme_minimal()